In [3]:
rdd=sc.parallelize([1,2,3])
In [4]:
rdd
Out[4]:
In [5]:
rdd.count()
Out[5]:
In [6]:
rdd2=rdd.map(lambda x: x+1)
lazy evaluation: building a graph for the best way to apply all the functions
In [10]:
tryseries=rdd.toSeries()
In [13]:
rdd.collect()
Out[13]:
In [14]:
_=1
In [16]:
a=2
In [22]:
import numpy as np
In [ ]:
def func(x):
return x+1
In [18]:
files=['a.png','b.png']
sc.paralllize(files).map(load)
In [26]:
rdd = sc.parallelize([(1,'a'),(2,'g')])
In [28]:
rdd.keys().reduce(lambda x,y:x+y)
Out[28]:
In [ ]: